{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ea6c2ff8",
   "metadata": {},
   "source": [
    "## Pandas通过索引查找数据"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3113204d",
   "metadata": {},
   "source": [
    "内容介绍:索引与python的切片原理相同，生成一个数据视图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bce8f5a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "3859d786",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0\n",
      "1    1\n",
      "2    2\n",
      "3    3\n",
      "4    4\n",
      "dtype: int64\n",
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B  C\n",
       "a  0  1  2\n",
       "b  3  4  5\n",
       "c  6  7  8"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 示例数据\n",
    "s0 = pd.Series(range(5))\n",
    "print(s0)\n",
    "s1 = pd.Series(range(5),index=['a','b','c','d','e'])\n",
    "print(s1)\n",
    "df0 = pd.DataFrame(np.arange(9).reshape(3,3),index=['a','b','c'],columns=['A','B','C'])\n",
    "df0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01b3811c",
   "metadata": {},
   "source": [
    "### 1.Series查找数据的方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a42eb97c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （1）使用位置索引查找数据\n",
    "s0[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f0e78647",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （2）使用标签索引查找数据\n",
    "s1['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8e37250e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （3）切片索引查找数据,基于位置索引的方式\n",
    "s1[1:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "92dd7d54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （4）切片索引查找数据，基于标签切片进行查找\n",
    "# 标签索引与位置索引不同，前后标签都包含。\n",
    "# 一般python的索引不包含终止部分。但此处的标签索引不同。\n",
    "s1['a':'c']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ad434e2a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （5）不连续索引。双层中括号[]\n",
    "s1[['a','c']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e6bdcd44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （6）使用位置索引进行不连续取数\n",
    "s1[[0,2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c330366a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    False\n",
      "b    False\n",
      "c     True\n",
      "dtype: bool\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (7)布尔索引。与numpy类似的使用方式\n",
    "print(s1>1)\n",
    "s1[s1>1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "031a2aa3",
   "metadata": {},
   "source": [
    "### 2. DataFrame索引查找数据\n",
    "\n",
    "对于获取的一维数据，返回的是Series数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2989a1c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    3\n",
       "c    6\n",
       "Name: A, dtype: int32"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (1)列索引取值\n",
    "df0['A']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "95b155e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  C\n",
       "a  0  2\n",
       "b  3  5\n",
       "c  6  8"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (2)列不连续索引取值\n",
    "df0[['A','C']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "20438074",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (3)获取具体的某个元素。实际上先获取列，之后便是Series获取数据的方式\n",
    "df0['A'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "278e400b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B  C\n",
       "a  0  1  2\n",
       "b  3  4  5"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (4)切片的方法。此时，默认获取的是行。\n",
    "df0[:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6110f657",
   "metadata": {},
   "source": [
    "### 3.DataFrame高级索引"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e0129c1",
   "metadata": {},
   "source": [
    "* 3.1 loc标签索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "6e3d7878",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#loc索引是基于标签名的索引。自定义的索引名。\n",
    "s1.loc['a':'c']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "036cf397",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "a  0  1\n",
       "b  3  4"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#loc是基于标签的索引。因此内部的都是标签参数。用逗号分隔的分别是行和列。\n",
    "df0.loc['a':'b','A':'B']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8461e67",
   "metadata": {},
   "source": [
    "* 3.2 iloc位置索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "a53c404c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "8f6e891e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1.iloc[0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "79fc591f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "a  0  1\n",
       "b  3  4"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df0.iloc[0:2,0:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e8c3f776",
   "metadata": {},
   "source": [
    "* 3.3 ix标签和位置混合索引\n",
    "\n",
    "在0.20.0开始，.ix索引器已被弃用，赞成更加严格.iloc 和.loc索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "fc8653f1",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'Series' object has no attribute 'ix'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32mC:\\Users\\ADMINI~1\\AppData\\Local\\Temp/ipykernel_7600/2513849275.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#如果数据中同时存在自动生成的数字索引及标签，不建议使用这种方式\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0ms1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mix\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mc:\\users\\administrator\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m   5476\u001b[0m         ):\n\u001b[0;32m   5477\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5478\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   5479\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5480\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'Series' object has no attribute 'ix'"
     ]
    }
   ],
   "source": [
    "#如果数据中同时存在自动生成的数字索引及标签，不建议使用这种方式\n",
    "s1.ix[0:2]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
